# Imports
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)
import numpy as np
import pandas as pd
import cmdstanpy
import arviz as az
import iqplot
import bebi103
import bokeh.io
import bokeh.plotting
# bokeh.io.output_notebook()
# Import seaborn for aesthetic plots
import seaborn as sns
from tqdm.notebook import tqdm
import pandas as pd
import ast
from bokeh.plotting import figure, show, curdoc
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColorBar
from bokeh.transform import linear_cmap
from bokeh.palettes import Viridis256
from bokeh.themes import Theme
from bokeh.layouts import column, row
bokeh.io.output_notebook()
import scipy as sp
import matplotlib.pyplot as plt
import scipy.stats as st
# Plotting params
size = 500;
Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions. Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.
/Users/mashok/opt/anaconda3/envs/bebi103/lib/python3.11/site-packages/dask/dataframe/_pyarrow_compat.py:23: UserWarning: You are using pyarrow version 11.0.0 which is known to be insecure. See https://www.cve.org/CVERecord?id=CVE-2023-47248 for further details. Please upgrade to pyarrow>=14.0.1 or install pyarrow-hotfix to patch your current version. warnings.warn(
Inference Formulation¶
Non Dimensionalised Model¶
Starting from the original equation:
\begin{align} \frac{d[ATP]}{dt} = - \gamma \cdot m \cdot \frac{[ATP]}{1 + \frac{ATP}{K_T} + \frac{ADP}{K_D} + \frac{P}{K_P}} \end{align}Since ATP $\rightleftharpoons$ ADP + P, denoting y $= [ATP]$,
\begin{align} \frac{dy}{dt} = - \gamma \cdot m \cdot \frac{y}{1 + \frac{y}{K_T} + \frac{ADP_o + y_o - y}{K_D} + \frac{P_o + y_o - y}{K_P}} \end{align}Where $ADP_o$ and $P_o$ refers to the initial concentrations of ADP and Phosphate respectively.
On reorganising and non-dimensionalizing,
\begin{align} K_{eff} = \frac{K_T*(1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P})}{\frac{1}{K_T} - \frac{1}{K_D} - \frac{1}{K_P}} \,, \end{align}\begin{align} \hat{t} = \frac{t * \gamma*m}{K_T*(1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P})} \,, \end{align}\begin{align} \hat{y} = \frac{y}{K_{eff}} \,, \end{align}leading to the following non-dimensionalised equation:
\begin{align} \frac{d\hat{y}}{d\hat{t}} = - \frac{\hat{y}}{1 + \hat{y}} \,, \end{align}The solution is given by:
\begin{align} [\,\, \hat{y} + ln(\hat{y}) \,\,]_{\hat{y}}^{\hat{y}(0)} = - \hat{t}\,, \end{align}\begin{align} \implies \hat{y} - \hat{y}(0) + ln(\hat{y}) - ln(\hat{y}(0)) = - \hat{t}\,, \end{align}\begin{align} \implies \hat{y} - \hat{y}(0) + ln(\frac{\hat{y}}{\hat{y}(0)}) = - \hat{t}\,, \end{align}[Side note: in the limit K_{eff} >> y0, the above equation reduces to an exponential curve.]
Bringing this to the Lambert function formulation,
\begin{align} \hat{y} + ln(\hat{y}) = \hat{y}(0) + ln(\hat{y}(0)) - \hat{t} = t'\,, \end{align}\begin{align} \implies \hat{y}*exp^{\hat{y}} =exp^{t'} = x\,, \end{align}If $x \geq 0$ (CHECK WHEN THIS IS TRUE),
\begin{align} \implies \hat{y} = W_o(x)\,, \end{align}where $W$ is the Lambert function.
Inference Set Up:¶
The data is of the form $\{y_i, t_i\}_{i = 1}^{N}$.
There is some uncertainty in the value of $y_i$ coming from the probe. Additionally, for each experiment, there is some initial unknown time delay $\tau$ coming from the delay in mounting on the microscope.
Note that in the equations above, there is some nonidentifiability in the parameters. We will reframe the effective Michaelis Menten constants as:
\begin{align} K_{eff} = \frac{K_T*(1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P})}{\frac{1}{K_T} - \frac{1}{K_D} - \frac{1}{K_P}} = C_1 * (1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P}) \,, \end{align}\begin{align} K_{time} = \frac{K_T*(1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P})}{\gamma*m} = \frac{C_2}{m} * (1 + \frac{y_o + ADP_o}{K_D} + \frac{y_o + P_o}{K_P}) \,, \end{align}where:
\begin{align} C_1 = \frac{K_T}{\frac{1}{K_T} - \frac{1}{K_D} - \frac{1}{K_P}} \,, \end{align}\begin{align} C_2 = \frac{K_T}{\gamma} \,. \end{align}Our parameters are $\theta = \{C_1, C_2, K_D, K_P, \tau\}$.
As derived above, our equation is:
\begin{align} \implies \hat{y} = W_o(x^{\theta})\,, \end{align}where $x = exp(\hat{y}(0) + \hat{y}(0) - \hat{t} - \hat{\tau})$, and $\tau = \frac{\tau}{K_time}$. The subscript $\theta$ emphasizes the dependance of the function on the parameters to infer.
This allows us to model the likelihood as a normal distribution:
\begin{align} y_i \sim N(W_o(x_i^{\theta}), \sigma_y) \end{align}We can model our priors as:
\begin{align} K_T \sim N(\mu_T, \sigma_T) \end{align}\begin{align} K_D \sim N(\mu_D, \sigma_D) \end{align}\begin{align} K_P \sim N(\mu_P, \sigma_P) \end{align}\begin{align} \gamma \sim N(\mu_{\gamma}, \sigma_{\gamma}) \end{align}\begin{align} \tau \sim N(\mu_{\tau}, \sigma_{\tau}) \end{align}Note that at this moment, the parameter $\tau$ is the same for all experiments (this should later be generalised).
We will flatten all the data from all the experiments. We will assume each datapoint is independently and identically distributed. Then, the posterior becomes:
\begin{align} P(\theta / \{y_i\}_i^N) \,\, \alpha \,\, P(\{y_i\}_i^N) / \theta) * P(\theta) = \Pi_i^N P(y_i / \theta) * P(\theta) \end{align}Note above we are fixing the spread $\sigma_y$ instead of making it a parameter for simplicity at this moment.
Import Data¶
# Read data
data_location = '../../analyzed_data/atp-hydro/ATP.csv';
# Read the CSV file into a DataFrame
df1 = pd.read_csv(data_location);
data_location = '../../analyzed_data/atp-hydro/ADP.csv';
# Read the CSV file into a DataFrame
df2 = pd.read_csv(data_location);
data_location = '../../analyzed_data/atp-hydro/Phosphate.csv';
# Read the CSV file into a DataFrame
df3 = pd.read_csv(data_location);
#### ------------- Load and Read Data ------------- ####
ATP_conc_list = []
ADP_conc_list = []
P_conc_list = []
ATP_curve_list = []
ratio_curve_list = []
linear_r2_list = []
exponential_r2_list = []
linear_hydrolysis_rate_list = []
exponential_hydrolysis_rate_list = []
times_list = []
data_locations_list = []
# for df in [df1]:
# for df in [df1, df2, df3]:
for df in [df1, df2]: # without phosphate data
# ATP Concentrations
ATP_conc_list.append(np.array(df["ATP Concentration (uM)"]));
# ADP Concentrations
ADP_conc_list.append(np.array(df["ADP Concentration (uM)"]));
# Phosphate Concentrations
P_conc_list.append(np.array(df["P Concentration (uM)"]));
# ATP Curves
ATP_curve_list.append([ast.literal_eval(df["ATP Curve (uM)"][i]) for i in range(len(df))])
# Ratio Curves
ratio_curve_list.append([ast.literal_eval(df["Ratio (A.U.)"][i]) for i in range(len(df))])
# Goodness of Fit
linear_r2_list.append(np.array(df["r-squared for linear fit"]));
exponential_r2_list.append(np.array(df["r-squared for exponential fit"]));
# Hydrolysis Rate
linear_hydrolysis_rate_list.append(np.array(df["Hydrolysis Rate (uM/s/motor) from Linear Fitting (-abs(Slope)/Motconc)"]));
exponential_hydrolysis_rate_list.append(np.array(df["Hydrolysis Rate (uM/s/motor) from Exponential Curve"]));
# Time
times_list.append([ast.literal_eval(df["Time Array (s)"][i]) for i in range(len(df))])
# Data location
data_locations_list.append(df["Data Location"])
times_list = [item for sublist in times_list for item in sublist];
ATP_conc_list = [item for sublist in ATP_conc_list for item in sublist];
ADP_conc_list = [item for sublist in ADP_conc_list for item in sublist];
P_conc_list = [item for sublist in P_conc_list for item in sublist];
ATP_curve_list = [item for sublist in ATP_curve_list for item in sublist];
ratio_curve_list = [item for sublist in ratio_curve_list for item in sublist];
linear_r2_list = [item for sublist in linear_r2_list for item in sublist];
exponential_r2_list = [item for sublist in exponential_r2_list for item in sublist];
linear_hydrolysis_rate_list = [item for sublist in linear_hydrolysis_rate_list for item in sublist];
exponential_hydrolysis_rate_list = [item for sublist in exponential_hydrolysis_rate_list for item in sublist];
data_locations_list = [item for sublist in data_locations_list for item in sublist];
ATP_end = 5; # define noise floor
start_index = 2; # throw away first few points
estimation_data = [];
p = figure(width = 400, height = 400)
p2 = figure(width = 400, height = 400)
for i, curve in enumerate(ATP_curve_list):
conditions = np.zeros(2);
#### Quality control of data
# Get end index
if len(np.where(np.array(curve) < ATP_end)[0]) != 0:
end_index = np.where(np.array(curve) < ATP_end)[0][0]
else:
end_index = -1
# Curve should have enough points
if len(np.array(curve[start_index:end_index])) > 5:
# Later atp measurements should not exceed initial atp measurement
if np.all(np.array(curve[start_index + 1:]) < curve[start_index]):
conditions[0] = 1;
# Ensure initial ATP isn't too high
if curve[start_index] < ATP_conc_list[i]:
conditions[1] = 1;
# # If all criteria is met
# plt.figure(figsize = (3, 3))
# plt.plot(range(len(curve[start_index:end_index])), curve[start_index:end_index])
# plt.show()
# Append data
estimation_data.append({
"atp": np.array(curve[start_index:end_index]),
"time": np.array(times_list[i])[start_index:end_index],
"atp0": ATP_conc_list[i],
"adp0": ADP_conc_list[i],
"p0": P_conc_list[i],
})
p.line(np.array(times_list[i])[start_index:end_index]/60, np.log(np.array(curve[start_index:end_index])))
p2.line(np.array(times_list[i])[start_index:end_index]/60, np.array(curve[start_index:end_index]))
show(gridplot([[p, p2]]))
data = pd.DataFrame(estimation_data)
show(iqplot.ecdf(data["atp0"].values))
show(iqplot.ecdf(data["adp0"].values))
show(iqplot.ecdf(data["p0"].values))
# Group unique conditions of atp0, adp0, p0
unique_conditions = np.unique(data[["atp0", "adp0", "p0"]].values, axis = 0)
# Visualise curves for each unique condition
for condition in unique_conditions:
atp0_indices = np.where(data["atp0"] == condition[0])[0]
adp0_indices = np.where(data["adp0"] == condition[1])[0]
p0_indices = np.where(data["p0"] == condition[2])[0]
curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
print(curve_indices)
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width = 300, height = 300)
for i in curve_indices:
# p.line(data["time"][i], data["atp"][i])
p.line(data["time"][i], np.log(data["atp"][i]))
show(p)
break
[12 17 42]
Predictive Prior Checks on Entire Dataset¶
### Prior predictive checks using numpy
# Define model
# def x_variable(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
# # Motor concentration
# m = 1;
# # Keff calculation
# # keff = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) );
# keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# # Ktime calculation
# # ktime = ((1/KT) - (1/KD) - (1/KP)) * keff / ( gamma * m );
# ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# # print('keff', keff)
# # print('ktime', ktime)
# # Result
# # result = np.exp(atp0/keff + np.log(atp0/keff) - (time + tau)/ktime);
# result = (atp0/keff)*np.exp((atp0/keff) - (time + tau)/ktime)
# # print('x', result)
# return result
def x_lambert(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
# Motor concentration
m = 1;
# Keff calculation
# keff = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) );
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
y0_cap = atp0/keff;
# Ktime calculation
# ktime = ((1/KT) - (1/KD) - (1/KP)) * keff / ( gamma * m );
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
t_cap = time/ktime;
tau_cap = tau/ktime;
result = np.exp(y0_cap + np.log(y0_cap) + tau_cap - t_cap)
# Result
result = (atp0/keff)*np.exp((atp0/keff) - (time + tau)/ktime)
return result
def tau_theoretical(atp, atp0, adp0, p0, C1, C2, KD, KP):
'''
Calculates theoretical time delay based on C1, C2, and initial conditions
'''
# Motor concentration
m = 1;
# Keff calculation
# keff = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) );
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# Ktime calculation
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# Time delay
tau = ((atp0 - atp[0])/keff + np.log(atp0/atp[0]))*ktime;
return tau
def C2_theoretical(atp, atp0, adp0, p0, C1, tau, KD, KP):
'''
Calculates theoretical time delay based on C1, tau, and initial conditions
'''
# Motor concentration
m = 1;
# Keff calculation
# keff = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) );
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# Calculate ktime
ktime = tau/((atp0 - atp[0])/keff + np.log(atp0/atp[0]));
# Calculate C2
C2 = ktime/( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
return C2
def y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
# Keff calculation
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
x = x_lambert(time, atp0, adp0, p0, C1, C2, KD, KP, tau);
if ~np.all(x>0):
raise Exception("X is not greater than zero for the lambert function calculation")
result = keff*sp.special.lambertw(x, k = 0);
return result
def y_theoretical_limit(time, atp0, adp0, p0, C1, C2, tau):
# Keff calculation
keff = C1;
ktime = C2;
t_cap = time/ktime;
tau_cap = tau/ktime;
y0_cap = atp0/keff;
x = np.exp(y0_cap + np.log(y0_cap) + tau_cap - t_cap)
if np.any(x<0):
print(x)
raise Exception("X is not greater than zero for the lambert function calculation")
result = keff*sp.special.lambertw(x, k = 0);
return result
def y_theoretical_simplified(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
'''
When Keff is really big compared to atp, the theoretical equation becomes a decaying exponential. That is,
y = yo * exp(-t/Ktime)
'''
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
result = atp0 * np.exp(-(time + tau)/ktime);
return result
theoretical_curves_list = [];
likelihoods_list = [];
def y_exponential(time, tau, atp0, C):
return atp0*np.exp(-(time + tau)/C);
# for _ in range(50):
# # C1 = np.random.normal(500, 100);
# # C2 = np.random.normal(500, 100);
# # KD = np.random.normal(500, 100);
# # KP = np.random.normal(500, 100);
# # tau = np.random.normal(500, 100);
# C1 = np.random.uniform(low = 10, high = 1000);
# C2 = np.random.uniform(low = 10, high = 1000);
# KD = np.random.uniform(low = 10, high = 1000);
# KP = np.random.uniform(low = 10, high = 1000);
# tau = np.random.uniform(low = 0, high = 1000);
# # Draw data sets out of the likelihood for each set of prior params
# theoretical_atp = np.array([y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau) for time, atp0, adp0, p0 in zip(data["time"], data["atp0"], data["adp0"], data["p0"])]).real
# likelihoods = sp.stats.norm.pdf(data["atp"], loc=theoretical_atp, scale=1)
# theoretical_curves_list.append(theoretical_atp)
# likelihoods_list.append(likelihoods)
Simplified Exponential¶
Earlier we noted that in the limit $K_{eff}$ >> y_o, the lambert function reduces to an exponential function:
\begin{align} y = y_o e^{-(t + \tau)/{K_{time}}} \end{align}Where we can calculate $\tau$ as:
\begin{align} y(t = 0) = y_o e^{-(0 + \tau)/{K_{time}}} \end{align}or
\begin{align} log(\frac{y(t = 0)}{y_o}) = -(\tau)/{K_{time}} \end{align}\begin{align} \implies \tau = - K_{time}* log(\frac{y(t = 0)}{y_o}) \end{align}Why might we operate in this limit in the first place? Consider KT = 30, KD = 50, KP = 900 (values for kinesin motors). Then, K_{eff} = .
KT = 10;
KD = 100;
KP = 900;
gamma = 1;
C1 = KT/gamma;
C2 = KT/((1/KT) - (1/KD) - (1/KP));
print(C1)
print(C2)
tau_array = np.zeros(len(data))
p1 = figure();
p2 = figure();
p3 = figure();
n_draws = 10;
ktime_array = np.random.normal(loc = 100, scale = 10, size = n_draws);
t_end_index = 100;
slopes = np.zeros(len(data));
for i, row in data.iterrows():
atp0 = data["atp0"];
adp0 = data["adp0"];
p0 = data["p0"];
# keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
time = row["time"][:t_end_index]/60;
p1.line(time, row["atp"][:t_end_index]);
p2.line(row["time"][:t_end_index]/60, np.log(row["atp"][:t_end_index]));
# roughly calculate slope of experimental line
if len(row["atp"]) > 5:
index = 5;
else:
index = len(row["atp"]) - 1;
slopes[i] = (np.log(row["atp"][index]) - np.log(row["atp"][0]))/(row["time"][index] - row["time"][0])
for j in range(n_draws):
# Sampled prior
ktime = ktime_array[j]
tau = - ktime*np.log(row["atp"][0]/row["atp0"]);
tau_array[i] = - ktime*np.log(row["atp"][0]/row["atp0"]);
log_atp_theoretical = np.log(row["atp0"]) - (time + tau)/ktime;
p2.line(time, log_atp_theoretical, color = "orange");
# p3.line(row["time"][:t_end_index]/ktime, row["atp"][:t_end_index]/keff + np.log(row["atp"][:t_end_index]/keff));
p1.xaxis.axis_label = "mins"
p1.yaxis.axis_label = "ATP [uM]"
show(p1)
p2.xaxis.axis_label = "mins"
p2.yaxis.axis_label = "log ATP [uM]"
show(p2)
# p3.xaxis.axis_label = "ND time"
# p3.yaxis.axis_label = "y + log y"
# show(p3)
print(tau_array/60)
show(iqplot.ecdf(slopes))
print(np.mean(slopes))
Prior preditive using stan¶
# Read stan file
with bebi103.stan.disable_logging():
sm_prior_pred = cmdstanpy.CmdStanModel(
stan_file="./stan_files/simplified_stan_ppc.stan"
)
N = len(data["atp0"]);
M = 10;
stan_data = {
"N": N,
"M": M,
"atp_t_0": np.array([curve[0] for curve in data["atp"].values]),
"atp0": data["atp0"],
"adp0": data["adp0"],
"p0": data["p0"],
"t": np.arange(0, 10000, 10000/M)
}
with bebi103.stan.disable_logging():
samples = sm_prior_pred.sample(
data=stan_data,
iter_sampling=1000,
fixed_param=True,
chains=1,
show_progress=False,
show_console=False,
)
samples = az.from_cmdstanpy(prior=samples, prior_predictive=["atp_ppc", "Ktime"])
samples
samples.prior["tau"].values.shape
show(iqplot.ecdf(data["atp0"].values))
print(len(data["atp0"].values))
print(len(data["atp0"][data["atp0"] < 200]))
show(iqplot.ecdf(samples.prior["tau"].values.flatten()))
print("mean", np.mean(samples.prior["tau"].values.flatten()))
# Visualise distribution of Ktime
show(iqplot.ecdf(samples.prior_predictive["Ktime"].values.flatten()))
unique_conditions
samples
unique_conditions
handselected_i = [6, 7, 8, 9, 21, 22, 23]
print(unique_conditions[i])
# Visualise curves for each unique condition
# for condition in unique_conditions:
# atp0_indices = np.where(data["atp0"] == condition[0])[0]
# # adp0_indices = np.where(data["adp0"] == condition[1])[0]
# # p0_indices = np.where(data["p0"] == condition[2])[0]
# adp0_indices = np.where(data["adp0"] == 0)[0]
# p0_indices = np.where(data["p0"] == 0)[0]
# print(atp0_indices, adp0_indices, p0_indices)
# curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
# curve_indices = handselected_i;
# print('indices:', curve_indices)
for index in handselected_i:
condition = unique_conditions[index]
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM")
for i in curve_indices:
# Plot experimental data
p.line(data["time"][i], data["atp"][i], color = "blue")
# Plot sampled data
# for j in range(100):
# # tau = samples.prior_predictive["tau"].values[0, j, i];
# tau = samples.prior["tau"].values[0, j];
# # print(samples.prior_predictive["atp_ppc"].values[0, j, i, :])
# # p.line(stan_data["t"] + tau, samples.prior_predictive["atp_ppc"].values[0, j, i, :], color = "orange")
# p.line(stan_data["t"] + data["time"][i][0], samples.prior_predictive["atp_ppc"].values[0, j, i, :], color = "orange")
# print(samples.prior_predictive["atp_ppc"].values[0, :, i, :].shape)
mean_ppc = np.mean(samples.prior_predictive["atp_ppc"].values[0, :, i, :], axis = 0);
std_ppc = np.std(samples.prior_predictive["atp_ppc"].values[0, :, i, :], axis = 0);
print("mean", mean_ppc.shape, std_ppc.shape)
# a
source = bokeh.plotting.ColumnDataSource({
'base':stan_data["t"] + data["time"][i][0],
'lower':mean_ppc - std_ppc,
'upper':mean_ppc + std_ppc
})
p.line(stan_data["t"] + data["time"][i][0], mean_ppc, color = "orange")
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.5)
p.add_layout(band)
# p2 = figure("Synth data")
# p2.line(stan_data["t"], mean_ppc)
# show(p2)
p.xaxis.axis_label = "time (s)";
show(p)
# break
import numpy as np
import pandas as pd
from bokeh.models import Band, ColumnDataSource
from bokeh.plotting import figure, show
# Create some random data
x = np.random.random(2500) * 140 +20
y = np.random.normal(size=2500) * 2 + 6 * np.log(x)
df = pd.DataFrame(data=dict(x=x, y=y)).sort_values(by="x")
df2 = df.y.rolling(window=300).agg({"y_mean": np.mean, "y_std": np.std})
df = pd.concat([df, df2], axis=1)
df["lower"] = df.y_mean - df.y_std
df["upper"] = df.y_mean + df.y_std
source = ColumnDataSource(df.reset_index())
p = figure(tools="", toolbar_location=None, x_range=(40, 160))
p.title.text = "Rolling Standard Deviation"
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.scatter(x="x", y="y", color="blue", marker="dot", size=10, alpha=0.4, source=source)
p.line("x", "y_mean", line_dash=(10, 7), line_width=2, source=source)
band = Band(base="x", lower="lower", upper="upper", source=source,
fill_alpha=0.3, fill_color="yellow", line_color="black")
p.add_layout(band)
show(p)
for i, condition in enumerate(data["atp0"]):
# p1 = bebi103.viz.predictive_ecdf(samples.prior_predictive["atp_ppc"].values[0, :, i, :], color="orange")
# iqplot.ecdf(data["atp"].values, p = p1)
p2 = figure();
# Plot ATP samples
for j in range(1000):
p2.line(stan_data["t"], samples.prior_predictive["atp_ppc"].values[0, j, i, :], color = "orange")
p2.line(data["time"][i], data["atp"][i], color = "blue")
show(p2)
break
samples.prior_predictive["atp_ppc"].values[0, :, 1, :].shape
By Transforming Data¶
The ATP signal y is transformed to $log(\frac{y}{y(\tau)}) + \frac{y}{K_{eff}}$ to avoid having to use the Lambert function.
Data Selection¶
# Prepare data
# remove_indices = [12, 26, 27, 46]; # manually select curves to not include
remove_indices = [];
atp_array = [];
ytau_array = [];
atp0_array = [];
adp0_array = [];
p0_array = [];
time_array = [];
size_array = [];
j = 0;
for i, row in data.iterrows():
if i not in remove_indices:
# if row["atp"][0]>500:
if len(row["time"]) > 5:
atp_array.append(row["atp"])
ytau_array.append(row["atp"][0])
atp0_array.append(row["atp0"])
adp0_array.append(row["adp0"])
p0_array.append(row["p0"])
time_array.append(row["time"])
size_array.append(len(row["atp"]));
j+=1;
# if j > 0:
# break
print(len(atp_array))
58
print(np.amax(ATP_conc_list))
def generate_hex_color(value, min_value = 0, max_value = 1420):
"""
Generate a hexadecimal color based on the input value within a specified range.
Args:
value (float): Input value for color generation.
min_value (float): Minimum value of the range.
max_value (float): Maximum value of the range.
Returns:
tuple: (Hexadecimal color code, normalized value)
"""
# Normalize the value within the range
normalized_value = (value - min_value) / (max_value - min_value)
# Convert the normalized value to a color
rgb_color = [int(x * 255) for x in plt.cm.viridis(value)[:3]] # Use Viridis colormap, but you can change this to any other colormap
# Convert RGB to hexadecimal color
hex_color = "#{:02x}{:02x}{:02x}".format(*rgb_color)
return hex_color, normalized_value
# Sample data
x = np.linspace(0, 10, 100)
y = np.sin(x)
values = np.random.rand(100) * 20 # Sample values
# Define the range
min_value = np.min(values)
max_value = np.max(values)
# Generate colors and normalized values
colors, normalized_values = zip(*[generate_hex_color(value, min_value, max_value) for value in atp0_array])
# Create Bokeh figure
p = figure(title = "Long-Time Experiments", height = 300, width = 300)
p.xaxis.axis_label = "Time (hrs)"
p.yaxis.axis_label = "ATP (uM)"
ps = figure(title = "Short-Time Experiments", height = 300, width = 300)
ps.xaxis.axis_label = "Time (hrs)"
ps.yaxis.axis_label = "ATP (uM)"
for i in range(len(atp0_array)):
if time_array[i][-1]/3600 > 3:
# Add glyphs with color mapper
# p.circle(time_array[i], atp_array[i], color=colors[i], alpha=0.2)
p.line(time_array[i]/3600, atp_array[i] , alpha=0.7)
else:
ps.line(time_array[i]/3600, atp_array[i] , alpha=0.7)
# # Create a color mapper
# mapper = linear_cmap(field_name='values', palette=Viridis256, low=0, high=1420)
# # Add color bar
# color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0, 0))
# p.add_layout(color_bar, 'right')
# Show the plot
show(gridplot([[ps, p]]))
# Create Bokeh figure
p = figure(title = "Long-Time Experiments", height = 300, width = 300)
p.xaxis.axis_label = "Time (hrs)"
p.yaxis.axis_label = "log[ATP] (uM)"
ps = figure(title = "Short-Time Experiments", height = 300, width = 300)
ps.xaxis.axis_label = "Time (hrs)"
ps.yaxis.axis_label = "log[ATP] (uM)"
for i in range(len(atp0_array)):
if time_array[i][-1]/3600 > 3:
# Add glyphs with color mapper
# p.circle(time_array[i], atp_array[i], color=colors[i], alpha=0.2)
p.line(time_array[i]/3600, np.log(atp_array[i]) , alpha=0.7)
else:
ps.line(time_array[i]/3600, np.log(atp_array[i]) , alpha=0.7)
# # Create a color mapper
# mapper = linear_cmap(field_name='values', palette=Viridis256, low=0, high=1420)
# # Add color bar
# color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0, 0))
# p.add_layout(color_bar, 'right')
# Show the plot
show(gridplot([[ps, p]]))
p = figure(height = 300, width = 300)
p.yaxis.axis_label = "First ATP Measurement (uM)"
p.xaxis.axis_label = "Initial ATP Concentration (uM)"
for i in range(len(atp0_array)):
p.circle(atp0_array[i], atp_array[i][0], alpha=0.7, legend_label = "Data")
p.line(np.arange(0, 1000, 100), np.arange(0, 1000, 100), color = "black", line_dash="dashed", legend_label = "Unity Line")
show(p)
Prior Predictive Check (Numpy)¶
# add more of the tail of the data! 50 uM is too high a cutoff
def parameter_calculation(ytau, atp0, adp0, p0, C1, KT, KD, gamma):
# Theoretical result converting time to ATP
m = 1; # motor concentration explicitly coded here since it's constant. Can make this varied if data for multiple motor concentrations is collected.
# keff = C1 * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
keff = C1 * ( 1 + (( atp0 + adp0 ) / KD)); # if ignoring KP
# ktime = tau/(np.log(atp0/ytau) + (atp0 - ytau)/keff);
# ktime = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / (gamma * m);
ktime = KT * ( 1 + (( atp0 + adp0 ) / KD)) / (gamma * m); # if ignoring KP
# print(" ")
# print('np.log(atp0/ytau)', np.log(atp0/ytau))
# print('tau', tau)
# print('(atp0 - ytau)/keff', (atp0 - ytau)/keff)
# print(" ")
return keff, ktime
def calculate_lambert_argument(t, tau, ytau, keff, ktime):
x = np.exp(ytau/keff + np.log(ytau/keff) + (tau - t)/ktime);
return x
def calculate_lambert_argument_no_tau(t, t1, y1, keff, ktime):
x = np.exp(y1/keff + np.log(y1/keff) + (t1 - t)/ktime);
return x
m = 1;
mu_KT = 10
mu_KD = 20
mu_KP = 10000
mu_C1 = 1/((1/mu_KT) - (1/mu_KD))
print("mean C1: ", mu_C1)
if mu_C1<0:
mu_C1=0
n_draws = 1000;
inv_KT_array = np.zeros(n_draws);
inv_KD_array = np.random.normal(1/mu_KD, 1, size = n_draws); # note: best guess for KT for NcD motors acc to gliding data is 30 uM.
inv_C1_array = np.zeros(n_draws);
gamma_array = np.random.uniform(0.001, 0.5, size = n_draws); # centre at 0.5. Go higher, to 10.
# Get lower bound on inv_C1 (or equivalently, upper bound on C1)
# inv_KD_array = np.random.normal(mu_KD, 1, size = n_draws);
# inv_C1_lower_bounds = np.zeros(len(atp_array));
# for i in range(len(atp_array)):
# atp0 = atp0_array[i];
# adp0 = adp0_array[i];
# p0 = p0_array[i]*1e3;
# atp = atp_array[i];
# time = time_array[i];
# y1 = atp[0];
# t1 = time[0];
# inv_C1_lower_bounds[i] = ((atp0 - y1)/(gamma*m*mu_KD) - (1 + (atp0 + adp0)/mu_KD)*np.log(atp0/y1))/(t1 + 600 - (atp0 - y1)/(gamma*m)) - 1/mu_KD
# print("inv_C1_lower_bounds", inv_C1_lower_bounds)
# print("C1_lower_bounds", 1/inv_C1_lower_bounds)
# print("C1_max_lower_bound", np.amax(1/inv_C1_lower_bounds))
# declare KT array
KT_array = np.zeros(n_draws);
# to store keff and ktime samples
keff_array = np.zeros(n_draws);
ktime_array = np.zeros(n_draws);
# tau_array = np.random.uniform(0, 15, size = n_draws)*60;
for i in range(len(atp_array)):
# Get initial concentrations according to experimental plan
experimental_atp0 = atp0_array[i];
experimental_adp0 = adp0_array[i];
experimental_p0 = p0_array[i]*1e3;
atp0 = experimental_atp0;
adp0 = experimental_adp0;
p0 = experimental_p0;
atp = atp_array[i];
# ytau = ytau_array[i];
time = time_array[i];
y1 = atp[0];
t1 = time[0];
atp_theoretical_array = np.zeros((len(time), n_draws))
p = figure()
p2 = figure(title = f"ATP0 = {experimental_atp0}, ADP0 = {experimental_adp0}, P0 = {experimental_p0}")
for j in range(n_draws):
# Assume some error
# atp0 = np.random.normal(experimental_atp0, 100);
# adp0 = np.random.normal(experimental_adp0, 100);
# p0 = np.random.normal(experimental_p0, 100);
# print("atp0, adp0, p0", atp0, adp0, p0)
# ---------------- SAMPLE sum ---------------- #
#Sample C1
inv_KT = np.random.normal(1/mu_KT, 1);
# Obtain upper bound on inv_KT
inv_KT_lower_bound = ((atp0 - y1)*inv_KD_array[j]/(gamma_array[j]*m) - (1 + (atp0 + adp0)*inv_KD_array[j])*np.log(atp0/y1))/(t1 + 600 - (atp0 - y1)/(gamma_array[j]*m))
# Bound samples inv_KT value
if inv_KT > inv_KT_lower_bound:
inv_KT = inv_KT_lower_bound
# Save sampled KT value
inv_KT_array[j] = inv_KT;
# ---------------- CALCULATE KT, KEFF, KTIME ---------------- #
# KT_array[j] = 1/((1/C1_array[j]) + (1/KD_array[j]) + (1/KP_array[j]));
# KT_array[j] = 1/(inv_C1_array[j] + inv_KD_array[j]);
KT_array[j] = 1/inv_KT;
# tau = np.random.uniform(0, 60)*60;
keff, ktime = parameter_calculation(y1, atp0, adp0, p0, 1/inv_C1_array[j], KT_array[j], 1/inv_KD_array[j], gamma_array[j])
keff_array[j] = keff;
ktime_array[j] = ktime;
# print("C1", 1/inv_C1)
print("KT", KT_array[j])
print("KD", 1/inv_KD_array[j])
print("Keff", keff)
print("Ktime", ktime)
# ---------------- CALCULATE THEORETICAL ATP ---------------- #
x = calculate_lambert_argument_no_tau(time, t1, y1, keff, ktime)
atp_theoretical = keff*sp.special.lambertw(x).real; #gives solution for branch k = 0.
atp_theoretical_array[:, j] = np.random.normal(atp_theoretical, 50);
mean_atp_theoretical = np.mean(atp_theoretical_array, axis = 1)
std_atp_theoretical = np.std(atp_theoretical_array, axis = 1)
median_atp_theoretical = np.median(atp_theoretical_array, axis = 1)
percentile25_theoretical = np.percentile(atp_theoretical_array, axis = 1, q = 25);
percentile75_theoretical = np.percentile(atp_theoretical_array, axis = 1, q = 75);
# Shade std
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':mean_atp_theoretical - std_atp_theoretical,
'upper':mean_atp_theoretical + std_atp_theoretical
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.5, fill_color = "orange")
p2.add_layout(band)
p2.line(time, percentile75_theoretical, color = "orange", legend_label = "75th Percentile");
p2.line(time, mean_atp_theoretical, color = "darkorange", legend_label = "Mean");
p2.line(time, median_atp_theoretical, color = "maroon", legend_label = "Median");
p2.line(time, percentile25_theoretical, color = "red", legend_label = "25th Percentile");
p2.line(time, atp, color = "black", legend_label = "Experimental Data");
grid = gridplot([[p, p2]]);
show(p2)
break
# if i == 1:
# break
# p3.line(row["time"][:t_end_index]/ktime, row["atp"][:t_end_index]/keff + np.log(row["atp"][:t_end_index]/keff));
# p1.xaxis.axis_label = "mins"
# p1.yaxis.axis_label = "ATP [uM]"
# show(p1)
# p.xaxis.axis_label = "seconds"
# p.yaxis.axis_label = "log ATP [uM]"
# show(p)
# p3.xaxis.axis_label = "ND time"
# p3.yaxis.axis_label = "y + log y"
# show(p3)
# Plot parameter spaces sampled
show(iqplot.ecdf(keff_array, title = "keff"))
show(iqplot.ecdf(ktime_array, title = "Ktime"))
show(iqplot.ecdf(KT_array, title = "KT"))
show(iqplot.ecdf(KD_array, title = "KD"))
show(iqplot.ecdf(gamma_array, title = "Gamma"))
show(iqplot.ecdf(tau_array, title = "Tau"))
Prior Predictive Check (Stan)¶
len([row[0] for row in data["atp"].values])
# Prepare data
t_end = 10000;
prior_check_indices = 1;
model_data = {
"N": int(t_end/100),
"time": np.arange(0, t_end, 100),
"M": len(data[:prior_check_indices]),
"ytau": np.array([row[0] for row in data["atp"].values[:prior_check_indices]]),
"atp0": data["atp0"].values[:prior_check_indices],
"adp0": data["adp0"].values[:prior_check_indices],
"p0": data["p0"].values[:prior_check_indices],
}
# Load stan file
sm = cmdstanpy.CmdStanModel(stan_file='./stan_files/prior_predictive_checks_transformed.stan')
# Sample chains
iteration_samples_prior = 100;
samples = sm.sample(
data=model_data,
chains=1,
iter_sampling=iteration_samples_prior,
show_console= True
)
samples = az.from_cmdstanpy(prior=samples, prior_predictive=["log_y_ppc"])
# Plot data
p = figure();
ppc_data = samples.prior_predictive.log_y_ppc.values[0];
for i in range(prior_check_indices):
for iteration in range(iteration_samples_prior):
# iteration = 0;
# Prior Predictive Checks
mean_ppc = np.mean(ppc_data[:, :, i], axis=1);
std_ppc = np.std(ppc_data[:, :, i], axis=1);
p.line(model_data["time"], ppc_data[iteration, :, i], color = "orange")
# # Shade std
# source = bokeh.plotting.ColumnDataSource({
# 'base':model_data["time"],
# 'lower':mean_ppc - std_ppc,
# 'upper':mean_ppc + std_ppc
# })
# band = bokeh.models.Band(base='base', lower='lower', upper='upper',
# source=source, fill_alpha=0.5, fill_color = "orange")
# p.add_layout(band)
# Get keff
# print(samples.prior.keff.values.shape)
keff = samples.prior.keff.values[0][iteration];
# From Experimental Data
y = data["atp"].values[i];
transformed_y = np.log(y/y[0]) + y/keff;
p.line(data["time"].values[i], transformed_y, color = "blue")
show(p)
Posterior Predictive Checks¶
x = np.arange(1e-6, 1e-3, 1e-6);
y = x*np.exp(x);
p = figure(width = 500, height = 500)
p.line(x, np.exp(x), color = "black", legend_label = "exp(x)")
p.line(x, y, legend_label = "xexp(x)")
p2 = figure(width = 500, height = 500)
x_recovered = sp.special.lambertw(y).real
x_approximation = 1.45869*np.log(1.2*y/(np.log(2.4*y/(np.log(1 + 2.4*y))))) - 0.45869*np.log(2*y/(np.log(1+2*y))); # Taken from https://www.sciencedirect.com/science/article/pii/S1369703X12000277#fig0005
p2.line(x, x_recovered, legend_label = "Using Scipy LambertW", alpha = 0.5)
p2.line(x, x_approximation, legend_label = "Using Analytical Approximation", color = "red", alpha = 0.5)
p2.line(x, x, line_dash="dashed", legend_label = "Perfect Recovery", alpha = 0.5)
show(gridplot([[p, p2]]))
# Prepare data and plot to make sure it has been sliced properly
M = len(atp_array);
# flat_atp_array = [item for curve in atp_array for item in curve];
# flat_time_array = [item for curve in time_array for item in curve];
flat_atp_array = [item for curve in atp_array for item in curve];
flat_time_array = [item for curve in time_array for item in curve];
end_indices_array = [int(sum(size_array[:i+1])) for i in range(len(size_array))]
# For stan in posterior predictive checks
model_data = {
"N": int(len(flat_atp_array)),
"time": flat_time_array,
"atp": flat_atp_array,
"end_indices": end_indices_array,
"M": M,
# "ytau": np.array(ytau_array),
"atp0": np.array(atp0_array),
"adp0": np.array(adp0_array),
"p0": np.array(p0_array),
}
# p2 = figure(title = "First Order Differential")
KT_assumed = 30; #uM
KD_assumed = 50; #uM
gamma_assumed = 0.2; #ATP/motor/sec
m = 1; #uM
p = figure(width = 300, height = 300)
for i in range(M):
# Post slicing
if i == 0:
time = model_data["time"][:end_indices_array[i]]
atp = model_data["atp"][:end_indices_array[i]]
else:
time = model_data["time"][end_indices_array[i-1]:end_indices_array[i]]
atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
p.line(time, atp, color = "orange")
# p.line(time, sp.ndimage.gaussian_filter(atp, sigma = 2), color = "black", legend_label = "Gaussian Smoothed", line_dash = "dashed")
# p.line(time, np.amax(atp)*(atp - np.amin(atp))/(np.amax(atp) - np.amin(atp)), color = "red", legend_label = "Linear Scaling", line_dash = "dashed")
# keff = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/((1/KT_assumed) - (1/KD_assumed));
# ktime = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/(gamma_assumed * m/KT_assumed);
# yM = model_data["atp"][0];
# tM = model_data["time"][0];
# lambert_arg = (yM/keff)*np.exp((-(model_data["time"] - tM)/ktime) + yM/keff);
# y_theoretical = keff*sp.special.lambertw(lambert_arg).real;
# p.line(model_data["time"], y_theoretical, color = "black", legend_label = "Theoretical ATP")
# time = np.array(time);
# atp = np.array(atp);
# p.line(-(time[0] - time), (atp - atp[0])/keff + np.log(atp/atp[0]))
# show(p)
# p = figure(width = 300, height = 300)
for i in range(len(data)):
if i not in remove_indices:
# if i == 26:
# Pre slicing
time = np.array(data["time"][i])
atp = data["atp"][i]
p.line(time, atp, color = "blue", line_dash="dashed")
# p2.line(time, -np.diff(atp), color = "orange")
# p2.line(time, -np.diff(sp.ndimage.gaussian_filter(atp, sigma = 2)), color = "black", line_dash = "dashed")
# break
# for KT_assumed, KD_assumed in zip(KT_assumed_array, KD_assumed_array):
# keff = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/((1/KT_assumed) - (1/KD_assumed));
# ktime = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/(gamma_assumed * m/KT_assumed);
# # yM = model_data["atp"][0];
# # tM = model_data["time"][0];
# # lambert_arg = (yM/keff)*np.exp((-(model_data["time"] - tM)/ktime) + yM/keff);
# # y_theoretical = keff*sp.special.lambertw(lambert_arg).real;
# # p.line(model_data["time"], y_theoretical, color = "black", legend_label = "Theoretical ATP")
# time = np.array(time);
# atp = np.array(atp);
# p.line(-(time[0] - time), (atp - atp[0])/keff + np.log(atp/atp[0]))
show(p)
p = figure(width = 300, height = 300)
for i in range(len(data)):
if i not in remove_indices:
# if i == 26:
# Pre slicing
time = np.array(data["time"][i])
atp = data["atp"][i]
p.line(time, np.log(atp), color = "blue", line_dash="dashed")
show(p)
# show(p2)
### Plot posterior predictive checks
# Group unique conditions of atp0, adp0, p0
unique_conditions = np.unique(np.array([(atp0, adp0, p0) for atp0, adp0, p0 in zip(atp0_array, adp0_array, p0_array)]), axis = 0)
# To collect rows of plots
total_grids = [];
total_grids_log = [];
# Visualise curves for each unique condition
for condition in unique_conditions:
atp0_indices = np.where(atp0_array == condition[0])[0]
adp0_indices = np.where(adp0_array == condition[1])[0]
p0_indices = np.where(p0_array == condition[2])[0]
curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
grid = [];
grid_log = [];
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=350, height=300)
p2 = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=350, height=300)
for i in curve_indices:
# p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=300, height=300)
if i == 0:
time = np.array(model_data["time"][:end_indices_array[i]])/60
atp = model_data["atp"][:end_indices_array[i]]
else:
time = np.array(model_data["time"][end_indices_array[i-1]:end_indices_array[i]])/60
atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
# ---------------- Plot experimental data ---------------- #
p.line(time, atp)
p.circle(time, atp)
p.xaxis.axis_label = "Time (mins)";
p.yaxis.axis_label = "ATP (uM)";
p2.line(time, np.log(atp))
p2.circle(time, np.log(atp))
p2.xaxis.axis_label = "Time (mins)";
p2.yaxis.axis_label = "log[ATP] (uM)";
grid.append(p)
grid_log.append(p2)
total_grids.append(grid);
total_grids_log.append(grid_log);
total_grids = np.array(total_grids).flatten();
total_grids = list(total_grids.reshape((9, 3)))
plot = gridplot(total_grids);
# show(plot)
total_grids_log = np.array(total_grids_log).flatten();
total_grids_log = list(total_grids_log.reshape((9, 3)))
plot_log = gridplot(total_grids_log);
show(plot_log)
# Load stan file
sm = cmdstanpy.CmdStanModel(stan_file='./stan_files/transformed_model.stan');
# sm = cmdstanpy.CmdStanModel(stan_file='./stan_files/constant_mentens_model.stan');
# Sample chains
samples = sm.sample(
data=model_data,
chains=4,
iter_sampling=1000,
show_console= False,
adapt_delta=0.8
)
samples = az.from_cmdstanpy(posterior=samples,
posterior_predictive=["lambert_argument_generated",
"atp_generated",
"atp_exp_generated",
"keff_generated",
"ktime_generated",
"tau_generated",
"nd_atp_measured",
"nd_atp_generated",
"nd_time_generated",
"square_error"
])
# Check diagnostics
bebi103.stan.check_all_diagnostics(samples)
chain 1 | | 00:00 Status
chain 2 | | 00:00 Status
chain 3 | | 00:00 Status
chain 4 | | 00:00 Status
tail-ESS for parameter gamma_exp is 170.6537738166718. tail-ESS for parameter delta_K_exp is 187.92123526391006. tail-ESS for parameter inv_C1_exp is 164.80311962153323. ESS or tail-ESS below 100 per chain indicates that expectation values computed from samples are unlikely to be good approximations of the true expectation values. Rhat for parameter gamma_exp is 1.0165399527067156. Rhat for parameter delta_K_exp is 1.017275549536756. Rhat for parameter KD_exp is 1.0136871101599199. Rhat for parameter inv_KD_exp is 1.0136887320253192. Rhat for parameter inv_C1_exp is 1.0169078136892493. Rank-normalized Rhat above 1.01 indicates that the chains very likely have not mixed. 69 of 4000 (1.725%) iterations ended with a divergence. Try running with larger adapt_delta to remove divergences. 0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10. E-BFMI indicated no pathological behavior.
7
samples
# Plot posterior predictive checks for lambert function arg
p = figure()
for i in range(model_data["M"]):
# p = figure(title = "log y by y0")
# if i == 0:
# start_index = 0;
# else:
# start_index = end_indices_array[i-1];
# time = data["time"][i]
# # ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
# log_y_generated_mean = np.mean(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# log_y_generated_std = np.std(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# # Shade std
# source = bokeh.plotting.ColumnDataSource({
# 'base': time,
# 'lower':log_y_generated_mean - log_y_generated_std,
# 'upper':log_y_generated_mean + log_y_generated_std
# })
# band = bokeh.models.Band(base='base', lower='lower', upper='upper',
# source=source, fill_alpha=0.5, fill_color = "orange")
# p.add_layout(band)
# p.line(time, log_y_generated_mean, color = "orange")
# show(p)
p = figure(title = "Lambert Function Argument")
if i == 0:
start_index = 0;
else:
start_index = end_indices_array[i-1];
time = np.array(data["time"][i])/60
# ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
lambert_argument_generated_mean = np.nanmean(samples.posterior_predictive["lambert_argument_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
lambert_argument_generated_std = np.nanstd(samples.posterior_predictive["lambert_argument_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# Shade std
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':lambert_argument_generated_mean - lambert_argument_generated_std,
'upper':lambert_argument_generated_mean + lambert_argument_generated_std
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.5, fill_color = "orange")
p.add_layout(band)
p.line(time, lambert_argument_generated_mean, color = "orange")
# p.line(time, model_data["atp"][start_index:end_indices_array[i]], color = "blue")
# # ---------------- Generated Data ---------------- #
# posterior_atp_mean = np.mean(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# posterior_atp_std = np.std(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# # Shade std
# source = bokeh.plotting.ColumnDataSource({
# 'base': time,
# 'lower':posterior_atp_mean - posterior_atp_std,
# 'upper':posterior_atp_mean + posterior_atp_std
# })
# band = bokeh.models.Band(base='base', lower='lower', upper='upper',
# source=source, fill_alpha=0.5, fill_color = "orange")
# p.add_layout(band)
# p.line(time, posterior_atp_mean, color = "orange")
# print(samples.posterior_predictive["log_y_generated"].values[0].shape)
# p.line(time, samples.posterior_predictive["log_y_generated"].values[0][0][start_index:end_indices_array[i]], color = "orange")
# break
show(p)
# if i == 10:
# break
# show(p)
# # Plot posterior predictive checks
# # Generated data mean
# atp_generated = samples.posterior_predictive["atp_generated"].values
# # atp_generated = atp_generated[atp_generated != np.nan];
# atp_generated_mean = np.nanmean(atp_generated, axis = (0, 1))
# atp_generated_std = np.nanstd(atp_generated, axis = (0, 1))
# # print(atp_generated_mean)
# # a
# unique_conditions = np.unique(np.array([(atp0, adp0, p0) for atp0, adp0, p0 in zip(atp0_array, adp0_array, p0_array)]), axis = 0)
# # To collect rows of plots
# total_grids = [];
# # Get beta
# # beta = np.mean(samples.posterior["beta"].values.flatten());
# # Visualise curves for each unique condition
# for condition in unique_conditions:
# atp0_indices = np.where(atp0_array == condition[0])[0]
# adp0_indices = np.where(adp0_array == condition[1])[0]
# p0_indices = np.where(p0_array == condition[2])[0]
# curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
# grid = [];
# for i in curve_indices:
# # for i in range(model_data["M"]):
# p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=300, height=300)
# if i == 0:
# time = np.array(model_data["time"][:end_indices_array[i]])/60
# atp = model_data["atp"][:end_indices_array[i]]
# atp_generated_mean_sliced = atp_generated_mean[:end_indices_array[i]];
# atp_generated_std_sliced = atp_generated_std[:end_indices_array[i]];
# else:
# time = np.array(model_data["time"][end_indices_array[i-1]:end_indices_array[i]])/60
# atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
# atp_generated_mean_sliced = atp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
# atp_generated_std_sliced = atp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
# # ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
# # atp_generated_mean_sliced = np.mean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# # atp_generated_std_sliced = np.std(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# # Shade std
# source = bokeh.plotting.ColumnDataSource({
# 'base': time,
# 'lower':atp_generated_mean_sliced - atp_generated_std_sliced,
# 'upper':atp_generated_mean_sliced + atp_generated_std_sliced
# })
# band = bokeh.models.Band(base='base', lower='lower', upper='upper',
# source=source, fill_alpha=0.5, fill_color = "orange")
# p.add_layout(band)
# p.line(time, atp_generated_mean_sliced, color = "orange")
# # print(atp_generated_mean_sliced)
# # a
# # p.line(time, model_data["atp"][start_index:end_index], color = "blue")
# # p.line(model_data["time"][start_index:end_index], model_data["atp"][start_index:end_index], color = "blue")
# # ---------------- Plot experimental data ---------------- #
# # p.line(time, np.amax(atp)*(atp-np.amin(atp))/(np.amax(atp) - np.amin(atp)), color = "blue", legend_label = "Photobleach Corrected")
# p.line(time, atp, color = "black", legend_label = "Pre-Correction Data", line_dash = "dashed")
# p.xaxis.axis_label = "Time (mins)";
# p.yaxis.axis_label = "ATP (uM)";
# grid.append(p)
# total_grids.append(grid);
# show(gridplot(total_grids))
len(atp0_array)
58
# Mean absolute errors
p = figure()
# Generated data mean
atp_generated_mean = np.nanmean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_generated_std = np.nanstd(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_exp_generated_mean = np.nanmean(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
atp_exp_generated_std = np.nanstd(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
for i in range(model_data["M"]):
if i == 0:
start_index = 0;
else:
start_index = end_indices_array[i-1];
time = np.array(data["time"][i])/60
# ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
lambert_argument_generated_mean = np.nanmean(samples.posterior_predictive["lambert_argument_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
lambert_argument_generated_std = np.nanstd(samples.posterior_predictive["lambert_argument_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# Shade std
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':lambert_argument_generated_mean - lambert_argument_generated_std,
'upper':lambert_argument_generated_mean + lambert_argument_generated_std
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.5, fill_color = "orange")
p.add_layout(band)
p.line(time, lambert_argument_generated_mean, color = "orange")
# p.line(time, model_data["atp"][start_index:end_indices_array[i]], color = "blue")
# # ---------------- Generated Data ---------------- #
# posterior_atp_mean = np.mean(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# posterior_atp_std = np.std(samples.posterior_predictive["log_y_generated"].values[0], axis = 0)[start_index:end_indices_array[i]];
# Plot posterior predictive checks
# Mean aboslute error
mean_abs_error_exp = np.zeros(len(atp_array));
mean_abs_error_lambert = np.zeros(len(atp_array));
# Generated data mean
atp_generated_mean = np.nanmean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_generated_std = np.nanstd(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_exp_generated_mean = np.nanmean(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
atp_exp_generated_std = np.nanstd(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
# Group unique conditions of atp0, adp0, p0
unique_conditions = np.unique(np.array([(atp0, adp0, p0) for atp0, adp0, p0 in zip(atp0_array, adp0_array, p0_array)]), axis = 0)
# To collect rows of plots
total_grids = [];
# Get beta
# beta = np.mean(samples.posterior["beta"].values.flatten());
# Visualise curves for each unique condition
for condition in unique_conditions:
atp0_indices = np.where(atp0_array == condition[0])[0]
adp0_indices = np.where(adp0_array == condition[1])[0]
p0_indices = np.where(p0_array == condition[2])[0]
curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
grid = [];
for i in curve_indices:
# for i in range(model_data["M"]):
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=350, height=300)
if i == 0:
time = np.array(model_data["time"][:end_indices_array[i]])/60
atp = model_data["atp"][:end_indices_array[i]]
atp_generated_mean_sliced = atp_generated_mean[:end_indices_array[i]];
atp_generated_std_sliced = atp_generated_std[:end_indices_array[i]];
atp_exp_generated_mean_sliced = atp_exp_generated_mean[:end_indices_array[i]];
atp_exp_generated_std_sliced = atp_exp_generated_std[:end_indices_array[i]];
else:
time = np.array(model_data["time"][end_indices_array[i-1]:end_indices_array[i]])/60
atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
atp_generated_mean_sliced = atp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
atp_generated_std_sliced = atp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
atp_exp_generated_mean_sliced = atp_exp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
atp_exp_generated_std_sliced = atp_exp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
# ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
# atp_generated_mean_sliced = np.mean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# atp_generated_std_sliced = np.std(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# Add generated atp according to lambert function
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':atp_generated_mean_sliced - atp_generated_std_sliced,
'upper':atp_generated_mean_sliced + atp_generated_std_sliced
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.1, fill_color = "orange")
p.add_layout(band)
p.line(time, atp_generated_mean_sliced, color = "orange", legend_label = "Lambert")
# Add generated atp according to exponential function
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':atp_exp_generated_mean_sliced - atp_exp_generated_std_sliced,
'upper':atp_exp_generated_mean_sliced + atp_exp_generated_std_sliced
})
band2 = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.1, fill_color = "magenta")
p.add_layout(band2)
p.line(time, atp_exp_generated_mean_sliced, color = "magenta", legend_label = "Exponential", line_dash = "dashed")
# p.line(time, model_data["atp"][start_index:end_index], color = "blue")
# p.line(model_data["time"][start_index:end_index], model_data["atp"][start_index:end_index], color = "blue")
# ---------------- Plot experimental data ---------------- #
# p.line(time, np.amax(atp)*(atp-np.amin(atp))/(np.amax(atp) - np.amin(atp)), color = "blue", legend_label = "Photobleach Corrected")
p.circle(time, atp, legend_label = "Data")
p.xaxis.axis_label = "Time (mins)";
p.yaxis.axis_label = "ATP (uM)";
grid.append(p)
print(i)
# Mean absolute error
mean_abs_error_exp[i] = np.sum(np.abs(atp_array[i] - atp_exp_generated_mean_sliced))/len(atp_array[i])
mean_abs_error_lambert[i] = np.sum(np.abs(atp_array[i] - atp_generated_mean_sliced))/len(atp_array[i])
show(gridplot([grid]))
total_grids.append(grid);
# break
# show(gridplot(total_grids))
12 17 42
13 18 43
14 19 46
15 20 44
16
45
47 53
48 55
56
51 57
49 52
50 54
2 6
3 7
4 8
21 28
22 29 35
23 30
24 27 40
25 31 36
34 37 41
32 38
33 39
26
5 10
0 9
1 11
mean_abs_error_lambert
array([0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.56630789, 0. , 0. ,
0. , 0. , 1.38010369, 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1.02699593, 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. ])
p = figure(title = "Mean Absolute Error across Experiments (Exponential)", width = 400, height = 300)
p.xaxis.axis_label = "Mean Absolute Error (uM)"
p.yaxis.axis_label = "Fraction of Experiments"
iqplot.ecdf(mean_abs_error_exp, p = p, color = "magenta")
p2 = figure(title = "Mean Absolute Error across Experiments (Lambert)", width = 400, height = 300)
p2.xaxis.axis_label = "Mean Absolute Error (uM)"
p2.yaxis.axis_label = "Fraction of Experiments"
iqplot.ecdf(mean_abs_error_lambert, p = p2, color = "darkorange")
show(gridplot([[p2, p]]))
# Error statistics
square_error_mean = np.sqrt(np.mean(samples.posterior_predictive["square_error"].values[0], axis = 0))
square_error_std = np.std(samples.posterior_predictive["square_error"].values[0], axis = 0)
show(iqplot.ecdf(square_error_mean))
# Plot non dimensionalised posterior predictive checks
# Generated data mean
nd_atp_generated_mean = np.mean(samples.posterior_predictive["nd_atp_generated"].values[0], axis = 0)
nd_atp_generated_std = np.std(samples.posterior_predictive["nd_atp_generated"].values[0], axis = 0)
nd_atp_measured_mean = np.mean(samples.posterior_predictive["nd_atp_measured"].values[0], axis = 0)
nd_atp_measured_std = np.std(samples.posterior_predictive["nd_atp_measured"].values[0], axis = 0)
nd_time_generated_mean = np.mean(samples.posterior_predictive["nd_time_generated"].values[0], axis = 0)
nd_time_generated_std = np.std(samples.posterior_predictive["nd_time_generated"].values[0], axis = 0)
# Group unique conditions of atp0, adp0, p0
unique_conditions = np.unique(np.array([(atp0, adp0, p0) for atp0, adp0, p0 in zip(atp0_array, adp0_array, p0_array)]), axis = 0)
# To collect rows of plots
nd_total_grids = [];
# Visualise curves for each unique condition
for condition in unique_conditions:
atp0_indices = np.where(atp0_array == condition[0])[0]
adp0_indices = np.where(adp0_array == condition[1])[0]
p0_indices = np.where(p0_array == condition[2])[0]
curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
grid = [];
for i in curve_indices:
# for i in range(model_data["M"]):
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=300, height=300)
if i == 0:
time = nd_time_generated_mean[:end_indices_array[i]]/60
nd_atp_measured_mean_sliced = nd_atp_measured_mean[:end_indices_array[i]];
nd_atp_generated_mean_sliced = nd_atp_generated_mean[:end_indices_array[i]];
nd_atp_generated_std_sliced = nd_atp_generated_std[:end_indices_array[i]];
else:
time = nd_time_generated_mean[end_indices_array[i-1]:end_indices_array[i]]/60
nd_atp_measured_mean_sliced = nd_atp_measured_mean[end_indices_array[i-1]:end_indices_array[i]];
nd_atp_generated_mean_sliced = nd_atp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
nd_atp_generated_std_sliced = nd_atp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
# ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
# nd_atp_generated_mean_sliced = np.mean(samples.posterior_predictive["nd_atp_generated"].values[0], axis = 0)[start_index:end_index];
# nd_atp_generated_std_sliced = np.std(samples.posterior_predictive["nd_atp_generated"].values[0], axis = 0)[start_index:end_index];
# Shade std
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':nd_atp_generated_mean_sliced - nd_atp_generated_std_sliced,
'upper':nd_atp_generated_mean_sliced + nd_atp_generated_std_sliced
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.5, fill_color = "orange")
p.add_layout(band)
p.line(time, nd_atp_generated_mean_sliced, color = "orange")
# p.line(time, model_data["atp"][start_index:end_index], color = "blue")
# p.line(model_data["time"][start_index:end_index], model_data["atp"][start_index:end_index], color = "blue")
# ---------------- Plot experimental data ---------------- #
# p.line(time, nd_atp_measured_mean_sliced + np.log(nd_atp_measured_mean_sliced), color = "blue")
# p.xaxis.axis_label = "Time (mins)";
# p.yaxis.axis_label = "ATP (uM)";
grid.append(p)
nd_total_grids.append(grid);
show(gridplot(nd_total_grids))
# # For hierarchical models
# parameters = (
# [f"tau[{i}]" for i in samples.posterior.tau_dim_0.values]
# )
# print(parameters)
# bokeh.io.show(
# bebi103.viz.parcoord(
# samples,
# parameters=parameters,
# xtick_label_orientation="vertical",
# transformation="minmax",
# )
# )
# parameters = (
# [f"sigma[{i}]" for i in samples.posterior.tau_dim_0.values]
# )
# print(parameters)
# bokeh.io.show(
# bebi103.viz.parcoord(
# samples,
# parameters=parameters,
# xtick_label_orientation="vertical",
# transformation="minmax",
# )
# )
parameters = (
[f"keff[{i}]" for i in samples.posterior.tau_dim_0.values]
)
print(parameters)
bokeh.io.show(
bebi103.viz.parcoord(
samples,
parameters=parameters,
xtick_label_orientation="vertical",
transformation="minmax",
)
)
parameters = (
[f"ktime[{i}]" for i in samples.posterior.tau_dim_0.values]
)
print(parameters)
bokeh.io.show(
bebi103.viz.parcoord(
samples,
parameters=parameters,
xtick_label_orientation="vertical",
transformation="minmax",
)
)
print("KT:", np.mean(samples.posterior.KT.values.flatten()))
print("KD:", np.mean(samples.posterior.KT.values.flatten()) + np.mean(samples.posterior.delta_K.values.flatten()))
print("sigma:", np.mean(samples.posterior.sigma.values.flatten()))
print("gamma:", np.mean(samples.posterior.gamma.values.flatten()))
KT: 33.80827315 KD: 35.812018285 sigma: 13.3434554 gamma: 0.1995004415
p1 = figure(title = "Cummulative Distribution of KT", height = 300, width = 300);
iqplot.ecdf(samples.posterior.KT.values.flatten(), p = p1)
p2 = figure(title = "Cummulative Distribution of KD", height = 300, width = 300);
iqplot.ecdf(samples.posterior.KT.values.flatten() + samples.posterior.delta_K.values.flatten(), p = p2)
p3 = figure(title = "Cummulative Distribution of Gamma", height = 300, width = 300);
iqplot.ecdf(samples.posterior.gamma.values.flatten(), p = p3)
p4 = figure(title = "Cummulative Distribution of Sigma", height = 300, width = 300);
iqplot.ecdf(samples.posterior.sigma.values.flatten(), p = p4)
show(gridplot([[p1, p2], [p3, p4]]))
# Corner plots to visualise parameter distributions
# bokeh.io.show(bebi103.viz.corner(samples, xtick_label_orientation=np.pi / 4,
# parameters = ["theta_C1", "theta_KD", "theta_sigma"],
# color_by_chain = True,
# # divergence_color = "black"
# ))
bokeh.io.show(bebi103.viz.corner(samples, xtick_label_orientation=np.pi / 4,
parameters = ["KT", "gamma"],
color_by_chain = True,
divergence_color = "none"
))
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : fill_color='none' [no close matches], hatch_color='none' [no close matches], line_color='none' [no close matches] {renderer: GlyphRenderer(id='p95257', ...)}
samples.posterior
np.mean(1/samples.posterior.inv_KT.values.flatten())
np.mean(1/samples.posterior.inv_KD.values.flatten())
np.mean(samples.posterior.gamma.values.flatten())
# Calculate tau distribution
show(iqplot.ecdf(samples.posterior_predictive.keff_generated.values.flatten(), title = "keff_generated"))
show(iqplot.ecdf(samples.posterior_predictive.ktime_generated.values.flatten(), title = "ktime_generated"))
show(iqplot.ecdf(1/samples.posterior.inv_C1.values.flatten(), title = "C1"))
show(iqplot.ecdf(1/samples.posterior.inv_C1.values.flatten(), title = "C1"))
show(iqplot.ecdf(1/samples.posterior.KT.values.flatten(), title = "KT"))
show(iqplot.ecdf(1/samples.posterior.delta_K.values.flatten(), title = "delta K"))
show(iqplot.ecdf(samples.posterior.gamma.values.flatten(), title = "gamma"))
show(iqplot.ecdf(samples.posterior_predictive.tau_generated.values.flatten()/60, title = "tau"))
samples.posterior_predictive.keff_generated.values.shape
mean_keff_generated = np.nanmean(samples.posterior_predictive.keff_generated.values, axis = (0, 1))
mean_ktime_generated = np.nanmean(samples.posterior_predictive.ktime_generated.values, axis = (0, 1))
p = figure(title = "Keff vs ATP0 + ADP0", width = 300, height = 300);
# p2 = figure(title = "Ktime vs ATP0 + ADP0", width = 300, height = 300);
p.circle(np.array(atp0_array) + np.array(adp0_array), mean_keff_generated)
show(p)
A Hierarchical Model¶
In the above analysis, the posterior predictive checks do not seem satisfactory.
We note that our dataset is an assay of various repeats, some at different initial conditions. Given the high variability of biological systems, it would make sense to consider a hierarchical model instead. In this section, we implement the same.
# Prepare data and plot to make sure it has been sliced properly
M = len(atp_array);
# flat_atp_array = [item for curve in atp_array for item in curve];
# flat_time_array = [item for curve in time_array for item in curve];
flat_atp_array = [item for curve in atp_array for item in curve];
flat_time_array = [item for curve in time_array for item in curve];
end_indices_array = [int(sum(size_array[:i+1])) for i in range(len(size_array))]
# For stan in posterior predictive checks
model_data = {
"N": int(len(flat_atp_array)),
"time": flat_time_array,
"atp": flat_atp_array,
"end_indices": end_indices_array,
"M": M,
# "ytau": np.array(ytau_array),
"atp0": np.array(atp0_array),
"adp0": np.array(adp0_array),
"p0": np.array(p0_array),
}
# Plot posterior predictive checks
p = figure()
# p2 = figure(title = "First Order Differential")
KT_assumed = 30; #uM
KD_assumed = 50; #uM
gamma_assumed = 0.2; #ATP/motor/sec
m = 1; #uM
for i in range(M):
# Post slicing
if i == 0:
time = model_data["time"][:end_indices_array[i]]
atp = model_data["atp"][:end_indices_array[i]]
else:
time = model_data["time"][end_indices_array[i-1]:end_indices_array[i]]
atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
p.line(time, atp, color = "orange")
# p.line(time, sp.ndimage.gaussian_filter(atp, sigma = 2), color = "black", legend_label = "Gaussian Smoothed", line_dash = "dashed")
# p.line(time, np.amax(atp)*(atp - np.amin(atp))/(np.amax(atp) - np.amin(atp)), color = "red", legend_label = "Linear Scaling", line_dash = "dashed")
# keff = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/((1/KT_assumed) - (1/KD_assumed));
# ktime = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/(gamma_assumed * m/KT_assumed);
# yM = model_data["atp"][0];
# tM = model_data["time"][0];
# lambert_arg = (yM/keff)*np.exp((-(model_data["time"] - tM)/ktime) + yM/keff);
# y_theoretical = keff*sp.special.lambertw(lambert_arg).real;
# p.line(model_data["time"], y_theoretical, color = "black", legend_label = "Theoretical ATP")
# time = np.array(time);
# atp = np.array(atp);
# p.line(-(time[0] - time), (atp - atp[0])/keff + np.log(atp/atp[0]))
for i in range(len(data)):
if i not in remove_indices:
# if i == 26:
# Pre slicing
time = np.array(data["time"][i])
atp = data["atp"][i]
p.line(time, atp, color = "blue", line_dash="dashed")
# p2.line(time, -np.diff(atp), color = "orange")
# p2.line(time, -np.diff(sp.ndimage.gaussian_filter(atp, sigma = 2)), color = "black", line_dash = "dashed")
# break
print(i)
# for KT_assumed, KD_assumed in zip(KT_assumed_array, KD_assumed_array):
# keff = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/((1/KT_assumed) - (1/KD_assumed));
# ktime = ( 1 + (( model_data["atp0"][0] + model_data["adp0"][0] )/KD_assumed))/(gamma_assumed * m/KT_assumed);
# # yM = model_data["atp"][0];
# # tM = model_data["time"][0];
# # lambert_arg = (yM/keff)*np.exp((-(model_data["time"] - tM)/ktime) + yM/keff);
# # y_theoretical = keff*sp.special.lambertw(lambert_arg).real;
# # p.line(model_data["time"], y_theoretical, color = "black", legend_label = "Theoretical ATP")
# time = np.array(time);
# atp = np.array(atp);
# p.line(-(time[0] - time), (atp - atp[0])/keff + np.log(atp/atp[0]))
show(p)
# show(p2)
# Load stan file
sm = cmdstanpy.CmdStanModel(stan_file='./stan_files/hierarchical_model.stan');
# Sample chains
samples = sm.sample(
data=model_data,
chains=1,
iter_sampling=1000,
show_console= True,
adapt_delta=0.99
)
samples = az.from_cmdstanpy(posterior=samples,
posterior_predictive=["lambert_argument_generated",
"atp_generated",
"atp_exp_generated",
"keff_generated",
"ktime_generated",
"tau_generated",
])
# Check diagnostics
bebi103.stan.check_all_diagnostics(samples)
samples
# Plot posterior predictive checks
# Generated data mean
atp_generated_mean = np.mean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_generated_std = np.std(samples.posterior_predictive["atp_generated"].values[0], axis = 0)
atp_exp_generated_mean = np.mean(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
atp_exp_generated_std = np.std(samples.posterior_predictive["atp_exp_generated"].values[0], axis = 0)
# Group unique conditions of atp0, adp0, p0
unique_conditions = np.unique(np.array([(atp0, adp0, p0) for atp0, adp0, p0 in zip(atp0_array, adp0_array, p0_array)]), axis = 0)
# To collect rows of plots
total_grids = [];
# Get beta
# beta = np.mean(samples.posterior["beta"].values.flatten());
# Visualise curves for each unique condition
for condition in unique_conditions:
atp0_indices = np.where(atp0_array == condition[0])[0]
adp0_indices = np.where(adp0_array == condition[1])[0]
p0_indices = np.where(p0_array == condition[2])[0]
curve_indices = np.intersect1d(np.intersect1d(atp0_indices, adp0_indices), p0_indices);
grid = [];
for i in curve_indices:
# for i in range(model_data["M"]):
p = figure(title = f"ATP0 = {condition[0]}uM, ADP0 = {condition[1]}uM, P0 = {condition[2]}uM", width=300, height=300)
if i == 0:
time = np.array(model_data["time"][:end_indices_array[i]])/60
atp = model_data["atp"][:end_indices_array[i]]
atp_generated_mean_sliced = atp_generated_mean[:end_indices_array[i]];
atp_generated_std_sliced = atp_generated_std[:end_indices_array[i]];
atp_exp_generated_mean_sliced = atp_exp_generated_mean[:end_indices_array[i]];
atp_exp_generated_std_sliced = atp_exp_generated_std[:end_indices_array[i]];
else:
time = np.array(model_data["time"][end_indices_array[i-1]:end_indices_array[i]])/60
atp = model_data["atp"][end_indices_array[i-1]:end_indices_array[i]]
atp_generated_mean_sliced = atp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
atp_generated_std_sliced = atp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
atp_exp_generated_mean_sliced = atp_exp_generated_mean[end_indices_array[i-1]:end_indices_array[i]];
atp_exp_generated_std_sliced = atp_exp_generated_std[end_indices_array[i-1]:end_indices_array[i]];
# ---------------- Transformed ATP Data (log(y/ytau) + y/keff) ---------------- #
# atp_generated_mean_sliced = np.mean(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# atp_generated_std_sliced = np.std(samples.posterior_predictive["atp_generated"].values[0], axis = 0)[start_index:end_index];
# Add generated atp according to lambert function
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':atp_generated_mean_sliced - atp_generated_std_sliced,
'upper':atp_generated_mean_sliced + atp_generated_std_sliced
})
band = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.25, fill_color = "orange")
p.add_layout(band)
p.line(time, atp_generated_mean_sliced, color = "orange", legend_label = "Lambert Likelihood")
# Add generated atp according to exponential function
source = bokeh.plotting.ColumnDataSource({
'base': time,
'lower':atp_exp_generated_mean_sliced - atp_exp_generated_std_sliced,
'upper':atp_exp_generated_mean_sliced + atp_exp_generated_std_sliced
})
band2 = bokeh.models.Band(base='base', lower='lower', upper='upper',
source=source, fill_alpha=0.25, fill_color = "magenta")
p.add_layout(band2)
p.line(time, atp_exp_generated_mean_sliced, color = "magenta", legend_label = "Exponential Likelihood")
# p.line(time, model_data["atp"][start_index:end_index], color = "blue")
# p.line(model_data["time"][start_index:end_index], model_data["atp"][start_index:end_index], color = "blue")
# ---------------- Plot experimental data ---------------- #
# p.line(time, np.amax(atp)*(atp-np.amin(atp))/(np.amax(atp) - np.amin(atp)), color = "blue", legend_label = "Photobleach Corrected")
p.line(time, atp, color = "black", legend_label = "Pre-Correction Data", line_dash = "dashed")
p.xaxis.axis_label = "Time (mins)";
p.yaxis.axis_label = "ATP (uM)";
grid.append(p)
total_grids.append(grid);
show(gridplot(total_grids))
bokeh.io.show(bebi103.viz.corner(samples, xtick_label_orientation=np.pi / 4,
parameters = ["sigma"],
color_by_chain = True,
# divergence_color = "black"
))
np.mean(samples.posterior.beta_array.values.flatten())
Using Lambert Function¶
p1 = figure(title = "ATP Curves");
n_draws = 1;
# C1_array = np.random.normal(loc = 5, scale = 0.5, size = n_draws);
C1_array = np.random.normal(loc = 1500, scale = 1, size = n_draws);
C2_array = np.random.normal(loc = 100, scale = 10, size = n_draws);
# tau_array = np.random.uniform(0, 30, size = n_draws)*60;
KD_array = np.random.normal(loc = 50, scale = 10, size = n_draws);
KP_array = np.random.normal(loc = 50, scale = 10, size = n_draws);
# For exponential curve
C_array = np.random.uniform(0, 5000, size = n_draws);
# C1_array = np.random.lognormal(mean = 10, sigma = 2, size = n_draws);
# C2_array = np.random.lognormal(mean = 10, sigma = 2, size = n_draws);
# KD_array = np.random.lognormal(mean = 10, sigma = 2, size = n_draws);
# KP_array = np.random.lognormal(mean = 10, sigma = 2, size = n_draws);
# tau_array = np.random.normal(loc = 5, scale = 2, size = n_draws);
sigma_array = np.random.normal(loc = 5, scale = 2, size = n_draws);
theoretical_atp_list = [];
likelihoods_list = [];
tau_theoretical_array = np.zeros(n_draws);
C2_theoretical_array = np.zeros(n_draws);
for i in range(n_draws):
# print(i)
## PRIORS
C1 = C1_array[i];
C2 = C2_array[i];
# tau = tau_array[i];
# print(C2/C1);
KD = KD_array[i];
KP = KP_array[i];
# tau = tau_array[i];
sigma = sigma_array[i];
C = C_array[i];
# print("keff", keff)
# print("ktime", ktime)
# print("tau", ((atp0 - atp[0])/keff + np.log(atp0/atp[0]))*ktime)
# print("tau", (np.log(atp0/atp[0]))*ktime)
# print("tau", ((atp0 - atp[0])/keff)*ktime)
theoretical_atp = []
for time, atp, atp0, adp0, p0 in zip(data["time"], data["atp"], data["atp0"], data["adp0"], data["p0"]):
## CALCULATE REMAINING PRIOR
# Theoretical time delay, based on priors and initial conditions
tau = tau_theoretical(atp, atp0, adp0, p0, C1, C2, KD, KP);
tau_theoretical_array[i] = tau;
# # Theoretical C2, based on priors and initial conditions
# C2 = C2_theoretical(atp, atp0, adp0, p0, C1, tau, KD, KP);
# C2_theoretical_array[i] = C2;
## LIKELIHOOD
# Draw data sets out of the likelihood for each set of prior params
# theoretical_atp = y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau) for time, atp0, adp0, p0 in zip(data["time"], data["atp0"], data["adp0"], data["p0"])]
# theoretical_atp = y_theoretical_limit(time, atp0, adp0, p0, C1, C2, tau).real;
theoretical_atp = y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau).real;
theoretical_atp_list.append(theoretical_atp)
likelihoods = np.zeros(len(atp))
for j, datum in enumerate(atp):
likelihoods = sp.stats.norm.pdf(datum, loc=theoretical_atp[j], scale=sigma)
likelihoods_list.append(likelihoods)
p1.line(time, atp, color = "blue")
p1.line(time, theoretical_atp, color = "orange")
# Without time delay
# theoretical_atp_no_delay = y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, 0).real;
# p1.line(time, theoretical_atp_no_delay, color = "red")
show(p1)
tau_theoretical_array/60
for time, atp, atp0, adp0, p0 in zip(data["time"], data["atp"], data["atp0"], data["adp0"], data["p0"]):
# keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# Alternatively, in the limit KD and KP are really large
keff = C1;
ktime = C2;
y_cap = atp/keff;
t_cap = time/ktime;
p2.line(t_cap, np.log(y_cap));
show(p2)
# # Flatten lists for plotting since they are inhomogenous
# flattened_theoretical_atp_list = np.array([item for curve in theoretical_atp_list for item in curve]);
# flattened_atp_list = np.array([item for curve in data["atp"] for item in curve]);
# # # iqplot.ecdf(C1, p = p)
# p = figure()
# bebi103.viz.predictive_ecdf(flattened_theoretical_atp_list, x_axis_label="spindle length (µm)", p = p)
# iqplot.ecdf(flattened_atp_list, q = "experimental data", p = p, style = "dots")
# show(p)
The data seems to be captured by the ecdfs, but note that the priors are noninformative and quite broad. We can improve upon this later.
Something to check for is to see if a simplified model is still able to capture behavior:
### Prior predictive checks using numpy
def y_theoretical_simplified(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
'''
When Keff is really big compared to atp, the theoretical equation becomes a decaying exponential. That is,
y = yo * exp(-t/Ktime)
'''
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
result = atp0 * np.exp(-(time + tau)/ktime);
return result
theoretical_curves_list = [];
likelihoods_list = [];
for _ in range(50):
# C1 = np.random.normal(500, 100);
# C2 = np.random.normal(500, 100);
# KD = np.random.normal(500, 100);
# KP = np.random.normal(500, 100);
# tau = np.random.normal(500, 100);
C1 = np.random.uniform(low = 10, high = 1000);
C2 = np.random.uniform(low = 10, high = 1000);
KD = np.random.uniform(low = 10, high = 1000);
KP = np.random.uniform(low = 10, high = 1000);
tau = np.random.uniform(low = 0, high = 1000);
# Draw data sets out of the likelihood for each set of prior params
theoretical_atp = np.array([y_theoretical_simplified(time, atp0, adp0, p0, C1, C2, KD, KP, tau) for time, atp0, adp0, p0 in zip(data["time"], data["atp0"], data["adp0"], data["p0"])]).real
likelihoods = sp.stats.norm.pdf(data["atp"], loc=theoretical_atp, scale=1)
theoretical_curves_list.append(theoretical_atp)
likelihoods_list.append(likelihoods)
p = figure()
bebi103.viz.predictive_ecdf(np.array(theoretical_curves_list), x_axis_label="spindle length (µm)", p = p)
iqplot.ecdf(np.array(data["atp"]), q = "experimental data", p = p, style = "dots")
show(p)
# ## Visualise spread of K_eff, K_time for the data
# keff_array = []
# ktime_array = []
# KT = 50;
# KD = 50;
# KP = 100;
# KT_array = np.arange(50, 550, 100)
# KD_array = np.arange(50, 550, 100)
# KP_array = np.arange(50, 550, 100)
# print(KT_array)
# parameters = []
# for KT in KT_array:
# for KD in KD_array:
# for KP in KP_array:
# for i in range(data["N"]):
# parameters.append([KT, KD, KP])
# atp0 = data["atp0"][i]
# adp0 = data["adp0"][i]
# p0 = data["p0"][i]
# # Keff calculation
# keff_array.append(KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) ));
# # Ktime calculation
# ktime_array.append(((1/KT) - (1/KD) - (1/KP)) * keff_array[i] / ( gamma * m ));
Predictive Prior Checks on "Well - Behaved" Dataset¶
When plotting the non-dimensionalised data, there is a "well behaved" initial part, after which later timesteps deviate from behavior.
Below is a plot generated from the advanced fitting notebook:
K_T = 28.1;
K_D = 34.6; #uM
K_P = 9000; #uM
K_inv = (1/K_D) + (1/K_P); #uM-1
K_eff_func = lambda y_o: (1 + (y_o*K_inv))/((1/K_T) - K_inv) # Units uM
fitting_information = [];
# Pandas dataframe
m = 1; #motor concentration (uM)
# For an ideal curve
gamma = 1; # assume hydrolysis rate = 1 ATP/motor/s (s-1)
# Plotting properties
line_size = 2;
size = 600;
plot1 = figure(title="Non dimensionalized ΑΤP vs Scaled Time (K_T and K_eff)", width=size, height=size)
plot2 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_T) vs Scaled Time", width=size, height=size)
plot3 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_eff) vs Scaled Time", width=size, height=size)
plot4 = figure(title="ATP vs Time", width=size, height=size)
plot5 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_eff) vs Time (s)", width=size, height=size)
plot6 = figure(title="log(ATP) vs Time", width=size, height=size)
n1, n2 = [5, 20]; # number of datapoints
for i in range(len(ATP_curve_list)):
#### --------- Reject Bad Curves --------- ####
# ATP curve should not be empty
if len(ATP_curve_list[i]) != 0:
# break
# If initial ATP value (according to probe) is too high, reject
if ATP_curve_list[i][0] < 2000:
# break
#### --------- Initialisations --------- ####
# Initial ATP Concentration
y_o = ATP_conc_list[i]; #initial ATP concentration (according to experimental plan)
# y_o = ATP_curve_list[i][0]; #initial ATP concentration
# y_o_exp = ATP_conc_list[i];
print('Initial ATP conc: ', y_o)
# # Initial ADP Concentration
ADP_o = ADP_conc_list[i];
P_o = P_conc_list[i];
print('P conc: ', P_o)
# Get time curve
time = np.array(times_list[i])
# Generate a color
color = plt.cm.viridis(y_o/800);
color = "#{:02x}{:02x}{:02x}".format(int(color[0] * 255),
int(color[1] * 255),
int(color[2] * 255))
#### --------- Without K_eff --------- ####
A = ATP_curve_list[i]
# Non-dimensionalise time
idealised_time_constant_K_T = K_T/(gamma*m);
time_nd_K_T = time/idealised_time_constant_K_T;
# Non-dimensionalize a curve
A_nd = np.array(ATP_curve_list[i])/K_T
#### --------- With K_eff --------- ####
K_eff = K_eff_func(y_o)
# Non-dimensionalise time
# idealised_time_constant_K_eff = K_eff/(gamma*m);
idealised_time_constant_K_eff = (K_T*(1 + (y_o*K_inv)))/(gamma*m);
time_nd_K_eff = time/idealised_time_constant_K_eff;
# Non dimensionalise ATP
A_nd_K_eff = np.array(ATP_curve_list[i])/K_eff;
#### --------- Plots --------- ####
plot1.line(time_nd_K_T, A_nd + np.log(A_nd), line_color=color, line_dash="dotted")
plot1.circle(time_nd_K_T, A_nd + np.log(A_nd), size = 1, fill_color="white", line_color=color)
# Data
# plot1.line(time_nd_K_eff, A_nd_K_eff + np.log(A_nd_K_eff), line_color=color, line_dash="dotted")
# plot1.circle(time_nd_K_eff, A_nd_K_eff + np.log(A_nd_K_eff), size = 1, fill_color="white", line_color="blue")
plot1.line(time_nd_K_eff, A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff)),
line_color=color, line_dash="dotted")
plot1.circle(time_nd_K_eff, A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff)),
size = 1, fill_color="white", line_color=color)
# Ideal curve (45 degree line)
plot1.line(np.arange(0, np.amax(time_nd_K_eff)), - np.arange(0, np.amax(time_nd_K_eff)),
line_color="black", line_dash="dotted")
# plot2.line(time_nd_K_T, A_nd + np.log(A_nd), line_color=color, line_dash="dotted")
plot2.line(time_nd_K_T, A_nd - y_o/K_T + np.log(A_nd/(y_o/K_T)),
line_color=color, line_dash="dotted")
plot2.circle(time_nd_K_T, A_nd - y_o/K_T + np.log(A_nd/(y_o/K_T)),
size = 1, fill_color="white", line_color=color)
# Data
y3 = A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff));
plot3.line(time_nd_K_eff, y3,
line_color=color, line_dash="dotted")
plot3.circle(time_nd_K_eff, y3,
size = line_size, fill_color="white", line_color=color)
plot4.line(time, A,
line_color=color, line_dash="dotted", line_width = line_size)
plot4.circle(time, A,
size = line_size, fill_color="white", line_color=color)
plot5.line(time, y3,
line_color=color, line_dash="dotted")
plot5.circle(time, y3,
size = line_size, fill_color="white", line_color=color)
plot6.line(time, np.log(A),
line_color=color, line_dash="dotted")
plot6.circle(time, np.log(A),
size = line_size, fill_color="white", line_color=color)
# #### --------- Fitting --------- ####
# # Ignore if infs. This usually corresponds to y_o = 0;
# if np.all(np.isinf(y3[n1:n2])) != True and len(y3[n1:n2]) >=2:
# plt.plot(time_nd_K_eff[n1:n2], y3[n1:n2])
# params, param_cov = fitting(time_nd_K_eff[n1:n2], y3[n1:n2], [1,1]);
# linear_params, linear_param_cov = fitting(time[n1:n2], A[n1:n2], [1,1]);
# plt.plot(time_nd_K_eff[n1:n2], y3[n1:n2])
# plt.plot(time_nd_K_eff[n1:n2], line(time_nd_K_eff[n1:n2], params[0], params[1]), '--k')
# plt.show()
# fitting_information.append(
# {
# 'Data Location': data_locations_list[i],
# 'Data Index': i,
# 'Initial ATP Conc': y_o,
# 'Initial ADP Conc': ADP_o,
# 'Initial Phosphate Conc': P_o,
# 'Fitted Parameters (-Slope, Y-intercept)': params,
# 'Fitting Covariance Matrix': param_cov,
# # 'Hydrolysis Rate': params[0]*K_T*(1 + y_o*K_inv)/m,
# 'Hydrolysis Rate': params[0],
# 'Linear Hydrolysis Rate': linear_params[0]/m,
# 'Time': time_nd_K_eff,
# 'Non-Dimensionalised Data': y3
# }
# )
# Add color bar
mapper = linear_cmap(field_name='color', palette=Viridis256, low=0, high=800)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0, 0))
plot1.add_layout(color_bar, 'right')
plot2.add_layout(color_bar, 'right')
plot3.add_layout(color_bar, 'right')
plot4.add_layout(color_bar, 'right')
plot5.add_layout(color_bar, 'right')
# Labels
plot1.xaxis.axis_label = "Scaled Time"
plot1.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_T/K_eff"
plot2.xaxis.axis_label = "Scaled Time"
plot2.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_T"
plot3.xaxis.axis_label = "Scaled Time"
plot3.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_eff"
plot4.xaxis.axis_label = "Time (s)"
plot4.yaxis.axis_label = "ATP (uM)"
plot5.xaxis.axis_label = "Time (s)"
plot5.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_eff"
# Ideal line
plot3.line(np.arange(0,np.amax(time_nd_K_eff)), - np.arange(0,np.amax(time_nd_K_eff)), line_color="black")
# Arrange the plots in a grid layout
grid = gridplot([[plot4, plot6, plot2], [plot5, plot3]])
# Show the grid layout
show(grid)
fitting_information = pd.DataFrame(fitting_information)
For all curves, the linear regime definetely lasts until ~ 7000 seconds ~ 2 hours. Let's visualise just this:
for i in range(len(ATP_curve_list)):
print(min(ATP_curve_list[i]))
K_T = 28.1;
K_D = 34.6; #uM
K_P = 9000; #uM
K_inv = (1/K_D) + (1/K_P); #uM-1
K_eff_func = lambda y_o: (1 + (y_o*K_inv))/((1/K_T) - K_inv) # Units uM
fitting_information = [];
# Pandas dataframe
m = 1; #motor concentration (uM)
# For an ideal curve
gamma = 1; # assume hydrolysis rate = 1 ATP/motor/s (s-1)
# Plotting properties
line_size = 2;
size = 600;
plot1 = figure(title="Non dimensionalized ΑΤP vs Scaled Time (K_T and K_eff)", width=size, height=size)
plot2 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_T) vs Scaled Time", width=size, height=size)
plot3 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_eff) vs Scaled Time", width=size, height=size)
plot4 = figure(title="ATP vs Time", width=size, height=size)
plot5 = figure(title="y - y0 + ln(y/y0) non-dimensionalised (K_eff) vs Time (s)", width=size, height=size)
plot6 = figure(title="log(ATP) vs Time", width=size, height=size)
n1, n2 = [5, 20]; # number of datapoints
j_list = []; # indices for ATP floor;
for i in range(len(ATP_curve_list)):
#### --------- Reject Bad Curves --------- ####
# ATP curve should not be empty
if len(ATP_curve_list[i]) != 0:
# If initial ATP value (according to probe) is too high, reject
if ATP_curve_list[i][0] < 2000:
#### --------- Initialisations --------- ####
# Initial ATP Concentration
y_o = ATP_conc_list[i]; #initial ATP concentration (according to experimental plan)
print('Initial ATP conc: ', y_o)
# Initial ADP Concentration
ADP_o = ADP_conc_list[i];
P_o = P_conc_list[i];
print('P conc: ', P_o)
# Generate a color
color = plt.cm.viridis(y_o/800);
color = "#{:02x}{:02x}{:02x}".format(int(color[0] * 255),
int(color[1] * 255),
int(color[2] * 255))
# Get time and ATP curve
j = np.where(np.array(ATP_curve_list[i]) < 70)[-1]; # Define an ATP floor of 50 uM. After this, the curve deviates from a straight line in nondimensionalised curves.
j = j[0]
if j == 0:
j = 10;
j_list.append(j)
time = np.array(times_list[i][:j])
A = ATP_curve_list[i][:j]
#### --------- Without K_eff --------- ####
# Non-dimensionalise time
idealised_time_constant_K_T = K_T/(gamma*m);
time_nd_K_T = time/idealised_time_constant_K_T;
# Non-dimensionalize a curve
A_nd = np.array(A)/K_T
#### --------- With K_eff --------- ####
K_eff = K_eff_func(y_o)
# Non-dimensionalise time
idealised_time_constant_K_eff = (K_T*(1 + (y_o*K_inv)))/(gamma*m);
time_nd_K_eff = time/idealised_time_constant_K_eff;
# Non dimensionalise ATP
A_nd_K_eff = np.array(A)/K_eff;
#### --------- Plots --------- ####
plot1.line(time_nd_K_T, A_nd + np.log(A_nd), line_color=color, line_dash="dotted")
plot1.circle(time_nd_K_T, A_nd + np.log(A_nd), size = 1, fill_color="white", line_color=color)
# Data
# plot1.line(time_nd_K_eff, A_nd_K_eff + np.log(A_nd_K_eff), line_color=color, line_dash="dotted")
# plot1.circle(time_nd_K_eff, A_nd_K_eff + np.log(A_nd_K_eff), size = 1, fill_color="white", line_color="blue")
plot1.line(time_nd_K_eff, A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff)),
line_color=color, line_dash="dotted")
plot1.circle(time_nd_K_eff, A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff)),
size = 1, fill_color="white", line_color=color)
# Ideal curve (45 degree line)
plot1.line(np.arange(0, np.amax(time_nd_K_eff)), - np.arange(0, np.amax(time_nd_K_eff)),
line_color="black", line_dash="dotted")
# plot2.line(time_nd_K_T, A_nd + np.log(A_nd), line_color=color, line_dash="dotted")
plot2.line(time_nd_K_T, A_nd - y_o/K_T + np.log(A_nd/(y_o/K_T)),
line_color=color, line_dash="dotted")
plot2.circle(time_nd_K_T, A_nd - y_o/K_T + np.log(A_nd/(y_o/K_T)),
size = 1, fill_color="white", line_color=color)
# Data
y3 = A_nd_K_eff - y_o/K_eff + np.log(A_nd_K_eff/(y_o/K_eff));
plot3.line(time_nd_K_eff, y3,
line_color=color, line_dash="dotted")
plot3.circle(time_nd_K_eff, y3,
size = line_size, fill_color="white", line_color=color)
plot4.line(time, A,
line_color=color, line_dash="dotted", line_width = line_size)
plot4.circle(time, A,
size = line_size, fill_color="white", line_color=color)
plot5.line(time, y3,
line_color=color, line_dash="dotted")
plot5.circle(time, y3,
size = line_size, fill_color="white", line_color=color)
plot6.line(time, np.log(A),
line_color=color, line_dash="dotted")
plot6.circle(time, np.log(A),
size = line_size, fill_color="white", line_color=color)
# Add color bar
mapper = linear_cmap(field_name='color', palette=Viridis256, low=0, high=800)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0, 0))
plot1.add_layout(color_bar, 'right')
plot2.add_layout(color_bar, 'right')
plot3.add_layout(color_bar, 'right')
plot4.add_layout(color_bar, 'right')
plot5.add_layout(color_bar, 'right')
# Labels
plot1.xaxis.axis_label = "Scaled Time"
plot1.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_T/K_eff"
plot2.xaxis.axis_label = "Scaled Time"
plot2.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_T"
plot3.xaxis.axis_label = "Scaled Time"
plot3.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_eff"
plot4.xaxis.axis_label = "Time (s)"
plot4.yaxis.axis_label = "ATP (uM)"
plot5.xaxis.axis_label = "Time (s)"
plot5.yaxis.axis_label = "y - y0 + ln(y/y0) non-dimensionalised K_eff"
# Ideal line
plot3.line(np.arange(0,np.amax(time_nd_K_eff)), - np.arange(0,np.amax(time_nd_K_eff)), line_color="black")
# Arrange the plots in a grid layout
grid = gridplot([[plot4, plot6, plot2], [plot5, plot3]])
# Show the grid layout
show(grid)
fitting_information = pd.DataFrame(fitting_information)
plt.hist(data_sliced['atp'])
plt.figure()
plt.hist(data_sliced['time'])
plt.figure()
plt.hist(data_sliced['atp0'])
plt.figure()
plt.hist(data_sliced['adp0'])
plt.figure()
plt.hist(data_sliced['p0'])
### Prior predictive checks using numpy
high_ATP_curves_indices = np.where(np.array(ATP_conc_list) > 100)[-1];
sliced_ATP = []
sliced_time = []
sliced_ATP0 = []
sliced_ADP0 = []
sliced_P0 = []
for i in high_ATP_curves_indices:
j = j_list[i];
sliced_ATP.append(ATP_curve_list[i][:j])
sliced_time.append(times_list[i][:j])
sliced_ATP0.append(ATP_conc_list[i])
sliced_ADP0.append(ADP_conc_list[i])
sliced_P0.append(P_conc_list[i])
flattened_sliced_ATP = [item for curve in sliced_ATP for item in curve[:j]];
flattened_sliced_time = [item for curve in sliced_time for item in curve[:j]];
flattened_sliced_ATP0 = [atp0 for i, atp0 in enumerate(sliced_ATP0) for _ in range(len(sliced_ATP[i][:j]))]
flattened_sliced_ADP0 = [atp0 for i, atp0 in enumerate(sliced_ADP0) for _ in range(len(sliced_ATP[i][:j]))]
flattened_sliced_P0 = [atp0 for i, atp0 in enumerate(sliced_P0) for _ in range(len(sliced_ATP[i][:j]))]
data_sliced = {
'N': sum([len(curve) for curve in ATP_curve_list]), #total number of datapoints
'atp' : flattened_sliced_ATP,
'time': flattened_sliced_time,
'atp0': flattened_sliced_ATP0,
'adp0': flattened_sliced_ADP0,
'p0': flattened_sliced_P0,
}
### Prior predictive checks using numpy
def y_theoretical_simplified(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
'''
When Keff is really big compared to atp, the theoretical equation becomes a decaying exponential. That is,
y = yo * exp(-t/Ktime)
'''
ktime = C2*( 1 + (( atp0 + adp0 ) / KD)+ (( atp0 + p0 ) / KP));
result = atp0 * np.exp(-(time + tau)/ktime);
return result
# Define model
def x_variable(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
# Motor concentration
m = 1;
# Keff calculation
# keff = KT * ( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP ) / ( (1/KT) - (1/KD) - (1/KP) );
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# Ktime calculation
# ktime = ((1/KT) - (1/KD) - (1/KP)) * keff / ( gamma * m );
ktime = C2*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
# print('keff', keff)
# print('ktime', ktime)
# Result
# result = np.exp(atp0/keff + np.log(atp0/keff) - (time + tau)/ktime);
result = (atp0/keff)*np.exp((atp0/keff) - (time + tau)/ktime)
# print('x', result)
return result
def y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau):
# Keff calculation
keff = C1*( 1 + ( atp0 + adp0 ) / KD + ( atp0 + p0 ) / KP );
x = x_variable(time, atp0, adp0, p0, C1, C2, KD, KP, tau);
result = keff*sp.special.lambertw(x, k = 0);
return result
theoretical_curves_list = [];
likelihoods_list = [];
p = figure()
for _ in range(1000):
# C1 = np.random.normal(500, 100);
# C2 = np.random.normal(500, 100);
# KD = np.random.normal(500, 100);
# KP = np.random.normal(500, 100);
# tau = np.random.normal(500, 100);
C1 = np.random.uniform(low = 10, high = 1000);
C2 = np.random.uniform(low = 10, high = 1000);
KD = np.random.uniform(low = 10, high = 1000);
KP = np.random.uniform(low = 10, high = 1000);
tau = np.random.uniform(low = 0, high = 1000);
# Draw data sets out of the likelihood for each set of prior params
theoretical_atp = np.zeros(len(data_sliced["time"]));
for i in range(len(data_sliced["time"])):
time = data_sliced["time"][i];
atp0 = data_sliced["atp0"][i];
adp0 = data_sliced["adp0"][i];
p0 = data_sliced["p0"][i];
# print('a', time, atp0, adp0, p0)
theoretical_atp[i] = y_theoretical(time, atp0, adp0, p0, C1, C2, KD, KP, tau);
# print(theoretical_atp[i])
# a
# theoretical_atp = [y_theoretical_simplified(time, atp0, adp0, p0, C1, C2, KD, KP, tau) for time, atp0, adp0, p0 in zip(data_sliced["time"], data_sliced["atp0"], data_sliced["adp0"], data_sliced["p0"])]
# likelihoods = sp.stats.norm.pdf(data_sliced["atp"], loc=theoretical_atp, scale=1)
theoretical_curves_list.append(theoretical_atp)
# plt.figure()
# plt.scatter(data_sliced["time"], theoretical_atp)
# iqplot.ecdf(np.array(theoretical_atp), q = "experimental data", p = p, style = "dots")
# likelihoods_list.append(likelihoods)
# print(len(theoretical_curves_list))
# p = figure()
bebi103.viz.predictive_ecdf(np.array(theoretical_curves_list), x_axis_label="spindle length (µm)", p = p)
# for curve in theoretical_curves_list[0]:
# # print(curve)
# iqplot.ecdf(np.array(curve), q = "experimental data", p = p, style = "dots")
iqplot.ecdf(np.array(data_sliced["atp"]), q = "experimental data", p = p, style = "dots")
show(p)
The ecdf suggests the "nice" curves have a bimodal distribution. Let's look at the histogram:
plt.hist(data_sliced["atp"], bins = 500);
print(np.array(data["atp"]))
Coding up the Stan model:¶
# Load stan file
# Load stan file
sm = cmdstanpy.CmdStanModel(stan_file='estimation.stan')
print(sm.code())
# with bebi103.stan.disable_logging():
# sm_prior_pred = cmdstanpy.CmdStanModel(
# stan_file="prior_predictive_checks.stan"
# )
# Sample
samples = sm.sample(data=data, show_console = False, adapt_delta=0.8)
samples = az.from_cmdstanpy(samples)
bebi103.stan.check_all_diagnostics(samples)
plots = [
iqplot.histogram(
samples.posterior[param].values.ravel(),
q=param,
rug=False,
frame_height=200,
frame_width=250,
)
for param in ["C1", "C2", "KD", "KP", "tau"]
]
bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=4))
# print(len(samples.posterior["KT"].values.ravel()))
bokeh.io.show(bebi103.viz.trace(samples, parameters=['C1', 'C2', 'KD', 'KP', 'tau']))
p = figure()
p.circle(
samples.posterior.C2.values.flatten(),
samples.posterior.KD.values.flatten(),
color="#fc8d62",
alpha=0.3,
legend_label="default sampling",
)
p.legend.click_policy = "hide"
bokeh.io.show(p)